- does parent SES cause better grades?
- r(gpa, ses) = .33, b = .41
- potential confound of peer relationships
- r(ses, peer) = .29
- r(gpa, peer) = .37
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2}+...+b_{p}X_{p} \]
\[R^2 = \frac{SS_{reggression}} {SS_{Y}} = \frac{s_{\hat{Y}}^2}{s_{Y}} \]
-Can be thought of as overlapping Venn diagrams
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{12} }{\sqrt{1-r_{12}^2}} \] \[ sr^2 = R_{Y.12}^2 - r_{Y2}^2 \]
\[ pr = r_{y1.2} = \frac{r_{Y1}-r_{Y2}r_{12} }{\sqrt{1-r_{Y2}^2}\sqrt{1-r_{12}^2}} \]
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{Y12} }{\sqrt{1-r_{12}^2}} \]
\[ pr^2 = \frac{R_{Y.12}^2 - r_{Y2}^2}{1-r_{Y2}^2} \]
\[ sr^2 = R_{Y.12}^2 - r_{Y2}^2 \]
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2}+...+b_{p}X_{p} \]
- intercept is when all predictors = 0
- regression coefficients are partial regression coefficients
- predicted change in y for a 1 unit change in x, holding all other predictors constant
mr.model <- lm(Stress ~ Support + Anxiety, data = Multipleregression) summary(mr.model)
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
library(psych) describe(Multipleregression$Stress)
## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 118 5.18 1.88 5.27 5.17 1.65 0.62 10.32 9.71 0.08 0.22 ## se ## X1 0.17
library(visreg) visreg2d(mr.model,"Support", "Anxiety", plot.type = "persp")
\[b_{1}^* = \frac{r_{Y1}-r_{Y2}r_{12}}{1-r_{12}^2}\]
\[b_{2}^* = \frac{r_{Y2}-r_{Y1}r_{12}}{1-r_{12}^2}\]
\[b_{1}^* = \frac{r_{Y1}-r_{Y2}r_{12}}{1-r_{12}^2}\]
\[ sr = r_{y(1.2)} = \frac{r_{Y1}-r_{Y2}r_{Y12} }{\sqrt{1-r_{12}^2}} \]
\[b_{1} = b_{1}^*\frac{s_{Y}}{s_{X1}} \]
\[b_{1}^* = b_{1}\frac{s_{X1}}{s_{Y}} \]
\[b_{0} = \bar{Y} - b_{1}\bar{X_{1}} - b_{2}\bar{X_{2}} \]
\[ \hat{Y} = b_{0} + b_{1}X_{1} + b_{2}X_{2} \]
\[ R = \sqrt{b_{1}^*r_{Y1} + b_{2}^*r_{Y2}} \] \[ R^2 = {b_{1}^*r_{Y1} + b_{2}^*r_{Y2}} \]
same as before
\[ \frac{SS_{regression}}{SS_{Y}} = R^2 \] \[ {SS_{regression}} = R^2({SS_{Y})} \]
\[ {SS_{residual}} = (1- R^2){SS_{Y}} \]
\[R_{A}^2 = 1 - (1 -R^2)\frac{n-1}{n-p-1} \]
anova(mr.model)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.151 113.151 49.028 1.807e-10 *** ## Anxiety 1 33.314 33.314 14.435 0.0002336 *** ## Residuals 115 265.407 2.308 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mr.model)
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
\[ H_{0}: \beta_{X}= 0 \] \[ H_{1}: \beta_{X} \neq 0 \]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-r_{xy}^2}{n-2}}} \]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-R_{Y\hat{Y}}^2}{n-p-1}}} \sqrt{\frac {1}{1-R_{i.jkl...p}^2}}\]
\[ se_{b} = \frac{s_{Y}}{s_{X}}{\sqrt{\frac {1-R_{Y\hat{Y}}^2}{n-p-1}}} \sqrt{\frac {1}{1-R_{i.jkl...p}^2}}\]
- what cannot be explained in Xi by other predictors
- Large tolerance (little overlap) means standard error will be small.
- what does this mean for including a lot of variables in your model?
\[R_{Y.1234...p}^2 = r_{Y1}^2 + r_{Y(2.1)}^2 + r_{Y(3.21)}^2 + r_{Y(4.321)}^2 + ... \]
m.1 <- lm(Stress ~ Support, data = Multipleregression) m.2 <- lm(Stress ~ Support + Anxiety, data = Multipleregression) anova(m.1, m.2)
## Analysis of Variance Table ## ## Model 1: Stress ~ Support ## Model 2: Stress ~ Support + Anxiety ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 116 298.72 ## 2 115 265.41 1 33.314 14.435 0.0002336 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.1)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.15 113.151 43.939 1.12e-09 *** ## Residuals 116 298.72 2.575 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(m.2)
## Analysis of Variance Table ## ## Response: Stress ## Df Sum Sq Mean Sq F value Pr(>F) ## Support 1 113.151 113.151 49.028 1.807e-10 *** ## Anxiety 1 33.314 33.314 14.435 0.0002336 *** ## Residuals 115 265.407 2.308 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ## Call: ## lm(formula = Stress ~ Support + Anxiety, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.1958 -0.8994 -0.1370 0.9990 3.6995 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -0.31587 0.85596 -0.369 0.712792 ## Support 0.40618 0.05115 7.941 1.49e-12 *** ## Anxiety 0.25609 0.06740 3.799 0.000234 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.519 on 115 degrees of freedom ## Multiple R-squared: 0.3556, Adjusted R-squared: 0.3444 ## F-statistic: 31.73 on 2 and 115 DF, p-value: 1.062e-11
## ## Call: ## lm(formula = Stress ~ Support, data = Multipleregression) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.8215 -1.2145 -0.1796 1.0806 3.4326 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 2.56046 0.42189 6.069 1.66e-08 *** ## Support 0.30006 0.04527 6.629 1.12e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.605 on 116 degrees of freedom ## Multiple R-squared: 0.2747, Adjusted R-squared: 0.2685 ## F-statistic: 43.94 on 1 and 116 DF, p-value: 1.12e-09
m.0 <- lm(Stress ~ 1, data = Multipleregression) m.1 <- lm(Stress ~ Support, data = Multipleregression) anova(m.0, m.1)
## Analysis of Variance Table ## ## Model 1: Stress ~ 1 ## Model 2: Stress ~ Support ## Res.Df RSS Df Sum of Sq F Pr(>F) ## 1 117 411.87 ## 2 116 298.72 1 113.15 43.939 1.12e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
model.1 <- lm(drugs ~ group, data = one.way ) summary(model.1)
## ## Call: ## lm(formula = drugs ~ group, data = one.way) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.5015 -0.1524 -0.0613 -0.0613 4.0619 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 0.32159 0.08964 3.588 0.000402 *** ## group1 -0.42402 0.10236 -4.142 4.73e-05 *** ## group2 -0.33289 0.11991 -2.776 0.005923 ** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.6013 on 247 degrees of freedom ## (3 observations deleted due to missingness) ## Multiple R-squared: 0.06498, Adjusted R-squared: 0.05741 ## F-statistic: 8.583 on 2 and 247 DF, p-value: 0.0002491
library(dplyr)
(one.way %>%
group_by(group) %>%
filter(!is.na(drugs)) %>%
summarise(mean(drugs)))
## # A tibble: 3 x 2 ## group `mean(drugs)` ## <fctr> <dbl> ## 1 0 0.32159423 ## 2 1 -0.10242087 ## 3 2 -0.01129146
contrasts(one.way$group)
## 1 2 ## 0 0 0 ## 1 1 0 ## 2 0 1
Multipleregression$group <- as.factor(Multipleregression$group) contrasts(Multipleregression$group)
## Tx ## Control 0 ## Tx 1
contr.treatment(4)
## 2 3 4 ## 1 0 0 0 ## 2 1 0 0 ## 3 0 1 0 ## 4 0 0 1
contr.sum(4)
## [,1] [,2] [,3] ## 1 1 0 0 ## 2 0 1 0 ## 3 0 0 1 ## 4 -1 -1 -1
contrasts(one.way$group) <- contr.sum(3) model.2 <- lm(drugs ~ group, data = one.way ) tidy(model.2)
## term estimate std.error statistic p.value ## 1 (Intercept) 0.06929397 0.04323336 1.602789 0.1102591875 ## 2 group1 0.25230027 0.06743556 3.741353 0.0002276286 ## 3 group2 -0.17171484 0.05180262 -3.314791 0.0010548126
library(psych) describe(one.way$drugs)
## vars n mean sd median trimmed mad min max range skew kurtosis ## X1 1 250 -0.01 0.62 -0.16 -0.15 0 -0.18 4.38 4.56 5.11 26.82 ## se ## X1 0.04
table(one.way$group)
## ## 0 1 2 ## 45 150 58
anova(model.2)
## Analysis of Variance Table ## ## Response: drugs ## Df Sum Sq Mean Sq F value Pr(>F) ## group 2 6.207 3.10337 8.5826 0.0002491 *** ## Residuals 247 89.312 0.36159 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# relevel() as a way to change which group is coded what, or: contrasts(one.way$group) <- contr.treatment(3, base = 2) model.3 <- lm(drugs ~ group, data = one.way ) tidy(model.3)
## term estimate std.error statistic p.value ## 1 (Intercept) -0.10242087 0.04942837 -2.0721070 3.929334e-02 ## 2 group1 0.42401511 0.10236434 4.1422150 4.726216e-05 ## 3 group3 0.09112942 0.09373803 0.9721713 3.319164e-01
contrasts(one.way$group) <- contr.treatment(3, base = 3) model.4 <- lm(drugs ~ group, data = one.way ) tidy(model.4)
## term estimate std.error statistic p.value ## 1 (Intercept) -0.01129146 0.07964706 -0.1417687 0.887378302 ## 2 group1 0.33288569 0.11991225 2.7760774 0.005923213 ## 3 group2 -0.09112942 0.09373803 -0.9721713 0.331916401